# -*- coding: utf-8 -*-
import scrapy
from scrapy.linkextractors import LinkExtractor
from ..items import ExampleItem

class ExamplesSpider(scrapy.Spider):
    name = 'examples'
    #设置爬虫的起始点
    allowed_domains = ['matplotlib.org']
    start_urls = ['https://matplotlib.org/2.0.2/examples/index.html']
    #例子列表页面的解析函数，提取每页例子页面的链接，用request提交
    def parse(self, response):
        le=LinkExtractor(restrict_css='div.toctree-wrapper.compound', deny='/index.html$')
        print(len(le.extract_links(response)))
        for link in le.extract_links(response):
            yield scrapy.Request(link.url,callback=self.parse_example)
    #例子页面的解析函数 获取例子源码的url
    def parse_example(self,response):
        href=response.css('a.reference.external::attr(href)').extract_first()
        url=response.urljoin(href)
        example=ExampleItem()
        example['file_urls']=[url]
        return example

